### Rachel Porter
### 12/1/2022
### Replication file for Male/Female Analysis

rm(list=ls())

# load libraries
library("cobalt")
library("WeightIt")
library("ebal")
library("jtools")
library("survey")
library("sensemakr")
library("stargazer")
library("MASS")
library("arm")
library("ggplot2")

# clear environment and set working directory
setwd("~/Dropbox/Primary_Elections/Issue Paper/Replication_Archive/JOP_Replication_Files")

# load data for male/female democrat analysis
original <- readRDS("male_democrats_data.rds")

# subset data into strategic/no groups for balancing
strategic <- subset(original, original$strategic == "Strategic")
not <- subset(original, original$strategic == "Not")

############################### MALE PROFESSIONAL DEMOCRATS#################################

# main findings; Model 1 with weights, Table A6
w.out.strat <- weightit(women_number ~ quality_cand + openrace + dem_pres_av + 
                    primary_type + year + minority, data = strategic, 
                    estimand = "ATT", method = "ebal")
d.w.strat <- svydesign(ids = ~1, weights = w.out.strat$weights,
                    data = strategic)
fit.strat <- svyglm(narrow_female ~ women_number + quality_cand + openrace + 
                    dem_pres_av + primary_type + year + minority, design = d.w.strat,
                    family=quasibinomial())

# appendix Model 1 without weights, Table A6
fit2 <- glm(narrow_female ~ women_number + quality_cand + openrace + dem_pres_av +
                    primary_type + year + minority, data = strategic, 
                    family=binomial())

# appendix Model 2 with weights, Table A6
w.out.strat3 <- weightit(women_number ~ as.factor(qual_chall) + openrace + dem_pres_av +
                    primary_type + year + minority, data = strategic, estimand = "ATT",
                    method = "ebal")
d.w.strat3 <- svydesign(ids = ~1, weights = w.out.strat$weights,
                    data = strategic)
fit.strat4 <- svyglm(narrow_female ~ women_number + as.factor(qual_chall) + openrace +
                    dem_pres_av + primary_type + year + minority, design = d.w.strat3, 
                    family=quasibinomial())

# appendix Model 2 without weights, Table A6
fit5 <- glm(narrow_female ~ women_number + as.factor(qual_chall) + openrace + 
                    dem_pres_av + primary_type + year + minority, data = strategic, 
                    family=binomial())

# appendix Model 3, Table A6
fit2 <- glm(narrow_female ~ women_number + quality_cand + openrace + dem_pres_av +
                    primary_type + year + minority, data = strategic, 
                    family=binomial())
w.out.strat2 <- weightit(women_number ~ quality_cand + openrace + ss_party + 
                    primary_type + year + minority, data = strategic, 
                    estimand = "ATT", method = "ebal")
d.w.strat2 <- svydesign(ids = ~1, weights = w.out.strat2$weights,
                    data = strategic)
fit.strat3 <- svyglm(narrow_female ~ women_number + quality_cand + openrace + 
                    ss_party + primary_type + year + minority, design = d.w.strat2, 
                    family=quasibinomial())

#################### 
#### APPENDIX TABLE A6
#################### 
stargazer(fit.strat,fit2,fit.strat4,fit5,fit.strat3,
                    star.char = c("*", "*"),
                    star.cutoffs = c(0.05, 0.01))

# Simulating predicted probabilities for Figure 2
set.seed(12484)
m <- 1000

# Simulate coefficients from a multivariate normal
betas <- fit.strat$coef
vcv <- vcov(fit.strat)
sim.betas <- mvrnorm(m, betas, vcv)

# Check to see if the simulated coefficients look like the real results
round(fit.strat$coef, digits = 2)
round(head(sim.betas, 10), digits = 2)
data.frame(sim.means = apply(sim.betas, 2, mean), 
                    betas = betas, sim.sd = apply(sim.betas, 2, sd), 
                    se = sqrt(diag(vcv)))   

# Set values for all other variables 
women_number <- c(0,1)
openrace1 <- c(0,0)
`primary_typeOpen Primary` <- c(0,0)
quality_cand2 <- c(0,0)
dem_pres_av <- c(55,55)
year2020 <- c(1,1)
minority1 <- c(0,0)

# Create hypothetical independent variable profile
x.nes <- data.frame(intercept = 1, women_number, quality_cand2, openrace1, 
                    dem_pres_av, `primary_typeOpen Primary`, year2020, minority1)

# Compute the predicted probabilities using the estimated coefficients
pp.betas <- invlogit(as.matrix(x.nes)%*%betas)
pp.betas

# Compute pp and confidence intervals using the simulated coefficients
pp.sim <- matrix(NA, nrow = m, ncol = length(women_number))

for(i in 1:m){
  pp.sim[i, ] <- invlogit(as.matrix(x.nes)%*%sim.betas[i, ]) 
}

# compute first differences for Figure 2
pe_strat <- pp.sim[,2] - pp.sim[,1]
lo <- quantile(pe_strat, prob = .025)
hi <- quantile(pe_strat, prob = .975)

fem_vector <- c(mean(pe_strat), lo, hi)

#################### 
## Love plots, APPENDIX FIGURE 4
#################### 

w.out.strat <- weightit(women_number ~ quality_cand + openrace + 
                        dem_pres_av + primary_type + year + minority,
                        data = strategic, estimand = "ATT", method = "ebal")
w.out.strat2 <- weightit(women_number ~ quality_cand + openrace + dem_pres_av + primary_type + 
                        year + minority,
                        data = strategic, estimand = "ATT", method = "cbps")

new.names.strat = c(quality_cand = "Experienced Candidate", 
                    openrace = "Open Seat", 
                    dem_pres_av = "District Presidential Vote", 
                    `primary_type_Open Primary` = "Open Primary", 
                    year_2020 = "2020 Primary", minority = "Non-White Candidate")

love.plot(women_number ~ quality_cand + openrace + dem_pres_av + primary_type + 
            year + minority,
          data = strategic, estimand = "ATT",
          stats = c("mean.diffs"),
          weights = list(w1 = get.w(w.out.strat),
                         w2 = get.w(w.out.strat2)),
          var.order = "unadjusted",
          abs = TRUE,
          line = FALSE, 
          title = NULL,
          size =  5,
          threshold = c(mean.diffs = .05,
                        ks = .05),
          var.names = new.names.strat,
          colors = c("black", "black", "black"),
          shapes = c("triangle filled", "circle filled", "square filled"),
          sample.names = c("Unweighted", "Entropy Balanced", "CBPS Weighted"),
          limits = list(mean.diffs = c(0, .55),
                        ks = c(0, .55)),
          wrap = 20, grid = FALSE,
          position = "top",
          themes = list(theme(text = element_text(size=20)),
                        theme(text = element_text(size=20))))

#################### 
## Sensitivity Analysis, APPENDIX TABLE A11
#################### 
w.out.strat <- weightit(women_number ~ as.numeric(quality_cand) + openrace + dem_pres_av + primary_type + year + minority, data = strategic, estimand = "ATT", method = "ebal")

d.w.strat <- svydesign(ids = ~1, weights = w.out.strat$weights,
                       data = strategic)
model <- lm(narrow_female ~ women_number + quality_cand + openrace + 
                    dem_pres_av + primary_type + year + minority, 
                    data = strategic, weights = w.out.strat$weights)
question = sensemakr(model, treatment = "women_number",
                     benchmark_covariates = "quality_cand1",
                     kd = c(3,7,11))
ovb_minimal_reporting(question)

#################### 
## Contour Plots, APPENDIX FIGURE 8
#################### 
plot(question, cex.label.text = 1.1, label.bump.x = .08, label.bump.y = .01,
     cex.lab = 1.1)
plot(question, sensitivity.of = "t-value", cex.label.text = 1.1, 
     label.bump.x = .08, label.bump.y = .01, cex.lab = 1.1)

rm(list=setdiff(ls(), c("not", "fem_vector", "pe_strat")))

############################### MALE AMATEUR DEMOCRATS#################################

# main findings; Model 1 with weights, Table A7
w.out.not <- weightit(women_number ~ openrace + dem_pres_av + primary_type + 
                      year + minority,
                      data = not, estimand = "ATT", method = "ebal")
d.w.am <- svydesign(ids = ~1, weights = w.out.not$weights,
                      data = not)
fit.am <- svyglm(narrow_female ~ women_number + openrace + dem_pres_av + 
                      primary_type + year + minority,
                      design = d.w.am, family=quasibinomial())
summ(fit.am, confint = TRUE, model.fit = TRUE, model.info = TRUE, digits = 4) 

# appendix Model 1 without weights, Table A7
fit2 <- glm(narrow_female ~ women_number + openrace + dem_pres_av + primary_type 
                      + year + minority, data = not, family=binomial())

# appendix Model 2, Table A7
w.out.not2 <- weightit(women_number ~ openrace + ss_party + primary_type + 
                      year + minority, data = not, estimand = "ATT", 
                      method = "ebal")
d.w.am2 <- svydesign(ids = ~1, weights = w.out.not2$weights,
                      data = not)
fit.am3 <- svyglm(narrow_female ~ women_number + openrace + ss_party + 
                      primary_type + year + minority,
                      design = d.w.am, family=quasibinomial())
summ(fit.am3, confint = TRUE, model.fit = TRUE, model.info = TRUE, digits = 4) 

#################### 
#### APPENDIX TABLE A7
#################### 
stargazer(fit.am, fit2, fit.am3,
                      star.char = c("*", "*"),
                      star.cutoffs = c(0.05, 0.01))

# Simulating predicted probabilities for Figure 2
set.seed(12484)
m <- 1000

# Simulate coefficients from a multivariate normal
betas <- fit.am$coef
vcv <- vcov(fit.am)
sim.betas <- mvrnorm(m, betas, vcv)

# Check to see if the simulated coefficients look like the real results
round(fit.am$coef, digits = 2)
round(head(sim.betas, 10), digits = 2)
data.frame(sim.means = apply(sim.betas, 2, mean), 
                      betas = betas, sim.sd = apply(sim.betas, 2, sd), 
                      se = sqrt(diag(vcv)))   

# Set values for all other variables 
women_number <- c(0,1)
openrace1 <- c(0,0)
dem_pres_av <- c(55,55)
`primary_typeOpen Primary` <- c(1,1)
year2020 <- c(1,1)
minority1 <- c(0,0)

# Create hypothetical independent variable profile
x.nes <- data.frame(intercept = 1, women_number, openrace1, dem_pres_av, 
                    `primary_typeOpen Primary`, year2020, minority1)

# Compute the predicted probabilities using the estimated coefficients
pp.betas <- invlogit(as.matrix(x.nes)%*%betas)

# Compute pp and confidence intervals using the simulated coefficients
pp.sim <- matrix(NA, nrow = m, ncol = length(women_number))

for(i in 1:m){
  pp.sim[i, ] <- invlogit(as.matrix(x.nes)%*%sim.betas[i, ]) 
}

pe_not <- pp.sim[,2] - pp.sim[,1]
lo <- quantile(pe_not, prob = .025)
hi <- quantile(pe_not, prob = .975)

fem_vector <- rbind.data.frame(fem_vector, c(mean(pe_not), lo, hi))
colnames(fem_vector) <- c("pe", "lo", "hi")

# running ttest discussed in manuscript
t.test(pe_strat, pe_not)

# saving simulations to create Figure 2
setwd("~/Dropbox/Primary_Elections/Issue Paper/Replication_Archive/JOP_Replication_Files")
saveRDS(fem_vector, file = "figure_2_female.rds")

#################### 
## Love plots, APPENDIX FIGURE 5
#################### 

w.out.not <- weightit(women_number ~ openrace + dem_pres_av + primary_type + year + minority,
                      data = not, estimand = "ATT", method = "ebal")
w.out.not2 <- weightit(women_number ~ openrace + dem_pres_av + primary_type + year + minority,
                       data = not, estimand = "ATT", method = "cbps")

new.names.not = c(openrace = "Open Seat", dem_pres_av = "District Presidential Vote", 
                        `primary_type_Open Primary` = "Open Primary", 
                        year_2020 = "2020 Primary", 
                        minority = "Non-White Candidate")

love.plot(women_number ~ openrace + dem_pres_av + primary_type + year + minority,
                        data = not, estimand = "ATT",
                        stats = c("mean.diffs"),
                        weights = list(w1 = get.w(w.out.not),
                                       w2 = get.w(w.out.not2)),
                        var.order = "unadjusted",
                        abs = TRUE,
                        line = FALSE, 
                        title = NULL,
                        size =  5,
                        threshold = c(mean.diffs = .05,
                                      ks = .05),
                        var.names = new.names.not,
                        colors = c("black", "black", "black"),
                        shapes = c("triangle filled", "circle filled", 
                                   "square filled"),
                        sample.names = c("Unweighted", "Entropy Balanced", 
                                         "CBPS Weighted"),
                        limits = list(mean.diffs = c(0, .55),
                                      ks = c(0, .55)),
                        wrap = 20, grid = FALSE,
                        position = "top",
                        themes = list(theme(text = element_text(size=20)),
                                      theme(text = element_text(size=20))))

#################### 
## Sensitivity Analysis, APPENDIX TABLE A12 
#################### 
model <- lm(narrow_female ~ women_number + openrace + dem_pres_av + 
                        primary_type + year + minority,
                        data = not, weights = w.out.not$weights)
question = sensemakr(model, treatment = "women_number",
                        benchmark_covariates = "minority1",
                        kd = c(3,7,11))
ovb_minimal_reporting(question)

#################### 
## Contour Plots, APPENDIX FIGURE 9
#################### 
plot(question, 
     cex.label.text = 1.1,
     label.bump.x = .08,
     label.bump.y = .01,
     cex.lab = 1.1)

plot(question, sensitivity.of = "t-value", 
     cex.label.text = 1.1,
     label.bump.x = .08,
     label.bump.y = .01,
     cex.lab = 1.1)
